{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-10-17T14:12:05.514612Z",
     "start_time": "2025-10-17T14:12:05.505054Z"
    }
   },
   "source": [
    "# 1. 导入库\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "from matplotlib import rcParams\n",
    "rcParams['font.sans-serif'] = ['SimHei']"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-10-17T14:13:04.780636Z",
     "start_time": "2025-10-17T14:13:04.532701Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 2. 导入数据\n",
    "df = pd.read_csv('data/house_sales.csv')"
   ],
   "id": "29362239e1c9a5e5",
   "outputs": [],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-10-17T14:14:43.373837Z",
     "start_time": "2025-10-17T14:14:43.320892Z"
    }
   },
   "cell_type": "code",
   "source": [
    "# 3. 数据概览\n",
    "print('总记录数：',len(df))\n",
    "print('字段数量：',len(df.columns))\n",
    "df.head(5)\n",
    "df.info()"
   ],
   "id": "86b5a51a854678be",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "总记录数： 106118\n",
      "字段数量： 12\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 106118 entries, 0 to 106117\n",
      "Data columns (total 12 columns):\n",
      " #   Column      Non-Null Count   Dtype \n",
      "---  ------      --------------   ----- \n",
      " 0   city        106118 non-null  object\n",
      " 1   address     104452 non-null  object\n",
      " 2   area        105324 non-null  object\n",
      " 3   floor       104024 non-null  object\n",
      " 4   name        105564 non-null  object\n",
      " 5   price       105564 non-null  object\n",
      " 6   province    106118 non-null  object\n",
      " 7   rooms       104036 non-null  object\n",
      " 8   toward      105240 non-null  object\n",
      " 9   unit        105564 non-null  object\n",
      " 10  year        57736 non-null   object\n",
      " 11  origin_url  105564 non-null  object\n",
      "dtypes: object(12)\n",
      "memory usage: 9.7+ MB\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# 4. 数据清洗\n",
    "# 删除无用的数据列\n",
    "df.drop(columns='origin_url',inplace=True)\n",
    "# 5. 新数据特征构造\n",
    "# 6. 问题分析及可视化"
   ],
   "id": "2c99f3ef1e6b6a2a"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
